
use "$datatemp/birthplace_codes.dta", clear
sort Birthplace_code
save "$datatemp/birthplace_codes.dta", replace

* Import official Rosstat data on the number of births by year and oblast before 1990.
import excel using "$data/rosstat_births_oblast_year.xlsx", clear firstrow sheet("numbirths") cellrange(B4:T87)
reshape long numbir, i(Birthplace_code) j(Year)
sort Birthplace_code Year
merge (Birthplace_code Year) using "$datatemp/numwomen_1544_89cens_official.dta"
keep if _merge==3
drop _merge
sort Birthplace_code Year
merge (Birthplace_code) using "$datatemp/birthplace_codes.dta"
keep if _merge==3
drop _merge
save "$datatemp/rosstat_gfr_oblast_year.dta", replace

* Merge in all variables that were previously created
use "$datatemp/rosstat_gfr_oblast_year.dta", clear
append using "$datatemp/gfr_90_repl.dta"
replace numbir=numbirth if Year>=1990 & Year<=1993
drop numbirth
gen gfr_official = numbir*1000/numwomen_1544
sort Birthplace_code
merge (Birthplace_code) using "$datatemp/urb_educ_vars.dta"
keep if _merge==3
drop _merge
sort Birthplace_code Year
by Birthplace_code: replace Birthplace=Birthplace[1] if missing(Birthplace)
sort Birthplace Year
merge (Birthplace Year) using "$datatemp/covars_oblast.dta"
drop republic Birthplace_rus Col_hs
keep if _merge==3
drop _merge
* Label variables
label variable Birthplace_code "Unique numerical code for each oblast"
label variable Year "Year of observation"
label variable Birthplace "Name of an oblast"
label variable numbir "Number of births"
label variable numwomen_1544 "Number of women age 15 to 44"
label variable loc "equals to 1 for an early beneficiary and 2 for a late beneficiary"
label variable gfr_official "General Fertility Rate using official Rosstat data"
label variable College "Number of individuals who have completed college per 1000 people age 10 or older"
label variable College_inc "Number of individuals who have not finished college per 1000 people age 10 or older"
label variable hs_spec "Number of individuals who have received a specialized high school education (vocational) per 1000 people age 10 or older"
label variable hs_gen "Number of individuals who have received a general high school education per 1000 people age 10 or older"
label variable hs_inc "Number of individuals who have not finished high school per 1000 people age 10 or older"
label variable elem "Number of individuals who have completed elementary school per 1000 people age 10 or older"
label variable women_urb_79 "Number of women age 15 to 44 living in urban areas in 1979"
label variable women_rur_79 "Number of women age 15 to 44 living in rural areas in 1979"
label variable trade "Volume of trade in rubles"
label variable concrete "Amount of concrete production"
label variable brick "Amount of brick production"
label variable meat "Amount of meat production"
label variable timber "Amount of timber production"
label variable canned "Amount of canned goods production"

save "$datatemp/rosstat_allvars.dta", replace


